library(tidyverse)
library(interactions) # for probe_interactions() plot
salary <- read_csv("../../data/salary_lec.csv", show_col_types = FALSE) |>
mutate(
envt = factor(ifelse(dept == 1, 'nature', 'urban'))
)
# make the acct slope a bit steeper to illustrate the point better
outdoors <- salary |>
mutate(
salary_aug = ifelse(
envt == 'nature',
salary + (service^2)/1.5 - 10,
salary
),
wellbeing = salary_aug
) |>
rename(
outdoor_time = service
) |>
select(wellbeing, envt, outdoor_time)
palette_probe <- c('#4ab8fc', '#ff7b01')
outdoors |>
ggplot(aes(x = outdoor_time, y = wellbeing, colour = envt)) +
geom_point(size = 5) +
geom_smooth(method = 'lm', se = F) +
# facet_wrap(~ envt) +
# theme(legend.position = 'none') +
scale_color_manual(values = palette_probe) +
NULL
envt:
contrasts(outdoors$envt)
urban
nature 0
urban 1
wellbeing:
outdoor_time:
m1 <- lm(wellbeing ~ outdoor_time + envt, data = outdoors)
summary(m1)
Call:
lm(formula = wellbeing ~ outdoor_time + envt, data = outdoors)
Residuals:
Min 1Q Median 3Q Max
-15.1784 -4.1564 -0.6475 3.4551 20.1555
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 19.0465 4.9210 3.870 0.000334 ***
outdoor_time 7.7724 0.9493 8.188 1.34e-10 ***
envturban -26.2466 1.9600 -13.391 < 2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 6.895 on 47 degrees of freedom
Multiple R-squared: 0.8511, Adjusted R-squared: 0.8447
F-statistic: 134.3 on 2 and 47 DF, p-value: < 2.2e-16
outdoors_probedat <- outdoors |>
mutate(
pred = outdoor_time,
modx_group = envt
)
plot_m1_probe <- probe_interaction(
model = m1,
pred = outdoor_time,
modx = envt,
interval = T
)$interactplot +
geom_point(data = outdoors_probedat, size = 5)
plot_data_probe <- plot_m1_probe
plot_data_probe$layers[[1]] <- NULL
plot_data_probe$layers[[1]] <- NULL
plot_m1_probe
meeting in the middle means that neither line fits the data very well
plot_data_probe
coef(m1)
(Intercept) outdoor_time envturban
19.046503 7.772417 -26.246592
Compute simple slopes.
\[ \begin{align} \widehat{wellbeing} &= \beta_0 + (\beta_1 \cdot outdoor_time) + (\beta_2 \cdot envt) \\ \widehat{wellbeing} &= 19 + (7.8 \cdot outdoor_time) + (-26.2 \cdot envt) \\ \end{align} \]
When \(envt = 0\) (nature):
\[ \begin{align} \widehat{wellbeing}_{envt=0} &= 19 + (7.8 \cdot outdoor_time) + (-26.2 \cdot 0) \\ \widehat{wellbeing}_{envt=0} &= 19 + (7.8 \cdot outdoor_time) \\ \end{align} \] When \(envt = 1\) (urban):
\[ \begin{align} \widehat{wellbeing}_{envt=1} &= 19 + (7.8 \cdot outdoor_time) + (-26.2 \cdot 1) \\ \widehat{wellbeing}_{envt=1} &= 19 - 26.2 + (7.8 \cdot outdoor_time) \\ \widehat{wellbeing}_{envt=1} &= 9 + (7.8 \cdot outdoor_time) \\ \end{align} \]
m2 <- lm(wellbeing ~ outdoor_time + envt + outdoor_time:envt, data = outdoors)
summary(m2)
Call:
lm(formula = wellbeing ~ outdoor_time + envt + outdoor_time:envt,
data = outdoors)
Residuals:
Min 1Q Median 3Q Max
-10.0029 -2.9519 -0.4881 3.1881 11.2969
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.3876 4.5651 -0.742 0.46183
outdoor_time 12.2887 0.8982 13.682 < 2e-16 ***
envturban 20.2899 6.5022 3.120 0.00312 **
outdoor_time:envturban -9.5597 1.3067 -7.316 3.07e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.738 on 46 degrees of freedom
Multiple R-squared: 0.9312, Adjusted R-squared: 0.9267
F-statistic: 207.4 on 3 and 46 DF, p-value: < 2.2e-16
coef(m2)
(Intercept) outdoor_time envturban outdoor_time:envturban
-3.387579 12.288744 20.289905 -9.559714
\[ \begin{align} \widehat{wellbeing} &= \beta_0 + (\beta_1 \cdot serv) + (\beta_2 \cdot envt) + (\beta_3 \cdot serv \cdot envt)\\ \widehat{wellbeing} &= -3.4 + (12.3 \cdot serv) + (20.3 \cdot envt) + (-9.6 \cdot serv \cdot envt)\\ \end{align} \]
When \(envt = 0\) (nature):
\[ \begin{align} \widehat{wellbeing}_{acct} &= -3.4 + (12.3 \cdot serv) + (20.3 \cdot envt) + (-9.6 \cdot serv \cdot envt)\\ \widehat{wellbeing}_{acct} &= -3.4 + (12.3 \cdot serv) + (20.3 \cdot 0) + (-9.6 \cdot serv \cdot 0)\\ \widehat{wellbeing}_{acct} &= -3.4 + (12.3 \cdot serv)\\ \end{align} \]
When \(envt = 1\) (urban):
\[ \begin{align} \widehat{wellbeing}_{mng} &= -3.4 + (12.3 \cdot serv) + (20.3 \cdot envt) + (-9.6 \cdot serv \cdot envt)\\ \widehat{wellbeing}_{mng} &= -3.4 + (12.3 \cdot serv) + (20.3 \cdot 1) + (-9.6 \cdot serv \cdot 1)\\ \widehat{wellbeing}_{mng} &= -3.4 + 20.3 + (12.3 \cdot serv) + (-9.6 \cdot serv)\\ \widehat{wellbeing}_{mng} &= 16.9 + (12.3 \cdot serv) + (-9.6 \cdot serv)\\ \widehat{wellbeing}_{mng} &= 16.9 + ((12.3 - 9.6) \cdot serv) \\ \widehat{wellbeing}_{mng} &= 16.9 + (2.7 \cdot serv)\\ \end{align} \]
When envt = 0 (nature):
coef(m2)[['(Intercept)']] # intercept
[1] -3.387579
coef(m2)[['outdoor_time']] # slope
[1] 12.28874
When envt = 1 (urban)
coef(m2)[['(Intercept)']] + coef(m2)[['envturban']] # intercept
[1] 16.90233
coef(m2)[['outdoor_time']] + coef(m2)[['outdoor_time:envturban']] # slope
[1] 2.72903
plot_m2_probe <- probe_interaction(
model = m2,
pred = outdoor_time,
modx = envt,
interval = T
)$interactplot +
geom_point(data = outdoors_probedat, size = 3) +
NULL
plot_m2_probe
m2_simple_effs <- tibble(
envt = c('nature', 'urban'),
modx_group = c('nature', 'urban'),
int = c(
coef(m2)[['(Intercept)']],
coef(m2)[['(Intercept)']] + coef(m2)[['envturban']]
),
slp = c(
coef(m2)[['outdoor_time']],
coef(m2)[['outdoor_time']] + coef(m2)[['outdoor_time:envturban']]
)
)
plot_m2_probe_intercept <- plot_m2_probe
plot_m2_probe_intercept$layers[[1]] <- NULL
plot_m2_probe_intercept +
xlim(-7, 7) +
ylim(-10, 100) +
geom_abline(
data = m2_simple_effs,
aes(intercept = int, slope = slp, colour = envt, linetype = envt),
linewidth = 1.5
) +
geom_point(
data = m2_simple_effs,
x = 0,
aes(y = int),
size = 5,
shape = 15
) +
geom_vline(xintercept = 0, linetype = 'dotted') +
labs(
subtitle = 'outdoor_time as-is'
) +
theme(
legend.position = 'bottom'
) +
NULL
Plot predictor as-is.
outdoors |>
ggplot(aes(x = outdoor_time, y = wellbeing)) +
geom_vline(xintercept = 0, colour = 'red', linewidth = 2) +
geom_point() +
NULL
Plot min-shifted predictor.
outdoors <- outdoors |>
mutate(
outdoor_time_min = outdoor_time - min(outdoor_time)
)
outdoors |>
ggplot(aes(x = outdoor_time_min, y = wellbeing)) +
geom_vline(xintercept = 0, colour = 'red', linewidth = 2) +
geom_point() +
NULL
0 is meaningful now = the minimum length of outdoor_time.
m3 <- lm(wellbeing ~ outdoor_time_min + envt + outdoor_time_min:envt, data = outdoors)
summary(m3)
Call:
lm(formula = wellbeing ~ outdoor_time_min + envt + outdoor_time_min:envt,
data = outdoors)
Residuals:
Min 1Q Median 3Q Max
-10.0029 -2.9519 -0.4881 3.1881 11.2969
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 28.6649 2.3291 12.307 3.72e-16 ***
outdoor_time_min 12.2887 0.8982 13.682 < 2e-16 ***
envturban -4.6445 3.2455 -1.431 0.159
outdoor_time_min:envturban -9.5597 1.3067 -7.316 3.07e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.738 on 46 degrees of freedom
Multiple R-squared: 0.9312, Adjusted R-squared: 0.9267
F-statistic: 207.4 on 3 and 46 DF, p-value: < 2.2e-16
When envt = 0 (acct):
coef(m3)[['(Intercept)']] # intercept
[1] 28.66493
coef(m3)[['outdoor_time_min']] # slope
[1] 12.28874
When envt = 1 (mng)
coef(m3)[['(Intercept)']] + coef(m3)[['envturban']] # intercept
[1] 24.02041
coef(m3)[['outdoor_time_min']] + coef(m3)[['outdoor_time_min:envturban']] # slope
[1] 2.72903
outdoors_probedat <- outdoors_probedat |>
mutate(
outdoor_time_min = outdoor_time - min(outdoor_time)
)
plot_m3_probe <- probe_interaction(
model = m3,
pred = outdoor_time_min,
modx = envt,
interval = T
)$interactplot +
geom_point(data = outdoors_probedat, size = 3)
plot_m3_probe
m3_simple_effs <- tibble(
envt = c('nature', 'urban'),
modx_group = c('nature', 'urban'),
int = c(
coef(m3)[['(Intercept)']],
coef(m3)[['(Intercept)']] + coef(m3)[['envturban']]
),
slp = c(
coef(m3)[['outdoor_time_min']],
coef(m3)[['outdoor_time_min']] + coef(m3)[['outdoor_time_min:envturban']]
)
)
plot_m3_probe_intercept <- plot_m3_probe
plot_m3_probe_intercept$layers[[1]] <- NULL
plot_m3_probe_intercept +
xlim(-7, 7) +
ylim(-10, 100) +
geom_abline(
data = m3_simple_effs,
aes(intercept = int, slope = slp, colour = envt, linetype = envt),
linewidth = 1.5
) +
geom_point(
data = m3_simple_effs,
x = 0,
aes(y = int),
size = 5,
shape = 15
) +
geom_vline(xintercept = 0, linetype = 'dotted') +
labs(
subtitle = 'outdoor_time min-shifted'
) +
theme(
legend.position = 'bottom'
) +
NULL
Plot centered predictor.
outdoors <- outdoors |>
mutate(
outdoor_time_c = scale(outdoor_time, center = TRUE, scale = FALSE)
)
outdoors |>
ggplot(aes(x = outdoor_time_c, y = wellbeing)) +
geom_vline(xintercept = 0, colour = 'red', linewidth = 2) +
geom_point() +
NULL
round(mean(outdoors$outdoor_time_c))
[1] 0
m4 <- lm(wellbeing ~ outdoor_time_c + envt + outdoor_time_c:envt, data = outdoors)
summary(m4)
Call:
lm(formula = wellbeing ~ outdoor_time_c + envt + outdoor_time_c:envt,
data = outdoors)
Residuals:
Min 1Q Median 3Q Max
-10.0029 -2.9519 -0.4881 3.1881 11.2969
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 56.4513 0.9712 58.124 < 2e-16 ***
outdoor_time_c 12.2887 0.8982 13.682 < 2e-16 ***
envturban -26.2602 1.3469 -19.496 < 2e-16 ***
outdoor_time_c:envturban -9.5597 1.3067 -7.316 3.07e-09 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 4.738 on 46 degrees of freedom
Multiple R-squared: 0.9312, Adjusted R-squared: 0.9267
F-statistic: 207.4 on 3 and 46 DF, p-value: < 2.2e-16
outdoors_probedat <- outdoors_probedat |>
mutate(
outdoor_time_c = scale(outdoor_time, center = TRUE, scale = FALSE)
)
plot_m4_probe <- probe_interaction(
model = m4,
pred = outdoor_time_c,
modx = envt,
interval = T
)$interactplot +
geom_point(data = outdoors_probedat, size = 3)
plot_m4_probe
m4_simple_effs <- tibble(
envt = c('nature', 'urban'),
modx_group = c('nature', 'urban'),
int = c(
coef(m4)[['(Intercept)']],
coef(m4)[['(Intercept)']] + coef(m4)[['envturban']]
),
slp = c(
coef(m4)[['outdoor_time_c']],
coef(m4)[['outdoor_time_c']] + coef(m4)[['outdoor_time_c:envturban']]
)
)
plot_m4_probe_intercept <- plot_m4_probe
plot_m4_probe_intercept$layers[[1]] <- NULL
plot_m4_probe_intercept +
xlim(-7, 7) +
ylim(-10, 100) +
geom_abline(
data = m4_simple_effs,
aes(intercept = int, slope = slp, colour = envt, linetype = envt),
linewidth = 1.5
) +
geom_point(
data = m4_simple_effs,
x = 0,
aes(y = int),
size = 5,
shape = 15
) +
geom_vline(xintercept = 0, linetype = 'dotted') +
labs(
subtitle = 'outdoor_time mean-centered'
) +
theme(
legend.position = 'bottom'
) +
NULL
With outdoor_time as-is:
summary(m2)$coefficients |> round(3)
Estimate Std. Error t value Pr(>|t|)
(Intercept) -3.388 4.565 -0.742 0.462
outdoor_time 12.289 0.898 13.682 0.000
envturban 20.290 6.502 3.120 0.003
outdoor_time:envturban -9.560 1.307 -7.316 0.000
With outdoor_time min-shifted:
summary(m3)$coefficients |> round(3)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 28.665 2.329 12.307 0.000
outdoor_time_min 12.289 0.898 13.682 0.000
envturban -4.645 3.246 -1.431 0.159
outdoor_time_min:envturban -9.560 1.307 -7.316 0.000
With outdoor_time mean-centered:
summary(m4)$coefficients |> round(3)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 56.451 0.971 58.124 0
outdoor_time_c 12.289 0.898 13.682 0
envturban -26.260 1.347 -19.496 0
outdoor_time_c:envturban -9.560 1.307 -7.316 0